#install.packages("rlang")
#library(rlang)
library(tidyverse)
library(haven)
library(formatR)
library(lubridate)
library(smooth)
library(forecast)
library(scales)
library(ggplot2)
library(readxl)
library(tidyverse)
library(data.table)
library(quantmod)
library(geofacet)
library(janitor)
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE
)
Combine past years: All revenue files are in a revenue
folder that I reference when I set the working directory. When adding
new fiscal years, put the the newest year of data for revenue and
expenditures in their respective folders.
Pre-FY2022
The code below chunk takes the .dta files for all fiscal years before FY 2022 and binds them together. Variable names were manually changed by past researchers so that they were consistent across years.
Additional variables are created: object, category, sequence, type, trans_agency, trans_type
trans_agency and trans_type are only for transfers. You can search for “transfers” under the variable “org_name”
setwd("C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/FY2021 replication/revenue")
# does all of stata code lines 1-514 of combining yearly data
allrevfiles = list.files(path = "C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/FY2021 replication/revenue", pattern = ".dta") %>% lapply(read_dta) %>% bind_rows
#Fy21: 62295 observations, 13 variables
#FY22: 65094 obs, 13 vars
#write_csv(allrevfiles, "allrevfiles.csv")
Reads in dta file and leaves fund as a character. No longer have to worry about preserving leading zeros in categories like the fund numbers. State code used to force fund, source, and from_fund to be 4 digits long and preserve leading zeros and fund was 3 digits long with leading zeros.
setwd("C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/FY2021 replication/expenditures")
allexpfiles = list.files(path = "C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/FY2021 replication/expenditures", pattern = ".dta") %>% lapply(read_dta) %>% bind_rows
#fy21 213372 observations, 20 variables
# fy22 225587 obs, 21 vars.
#write_csv(allexpfiles, "allexpfiles.csv")
Code below reads in the csv files created in chunks above (allrevfiles.csv and allrexpfiles.csv). These files contain all years of data combined into one file BEFORE any recoding is done. Do not use this file for summing categories because it is just an in between step before recoding revenue and expenditure categories.
# combined in past chunks called create-rev-csv and create-exp-csv
allrevfiles <- read_csv("allrevfiles.csv") #combined but not recoded
allexpfiles <- read_csv("allexpfiles.csv") #combined but not recoded
Normally, when your receive the new fiscal year files from the Comptrollers office, you will need to change the variable names so that they are consistent with past years. This is an example of reading in the new file and changing the variable names.
For FY 2022 and after, .dta files can be avoided entirely and .csv files and R code will be used.All files before this year had been saved and passed on as .dta files for Stata code before the transition to R in Fall 2022
Example code below: Read in excel file and rename columns so that it plays well with the other years’ files.
read_xlsx("Fis_Fut_Rev_2022.xlsx") %>%
rename(fy = 'FISCAL YEAR',
fund = 'FUND #',
fund_name = 'FUND NAME',
agency = 'AGENCY #',
agency_name = 'AGENCY NAME',
source = 'REVENUE SOURCE #',
source_name = 'REV SRC NAME',
receipts = 'REVENUE YTD AMOUNT'
) %>%
# do these come from funds_ab_whatever file?
mutate(fund_cat = FIND_COLUMN, #create fund_cat column
fund_cat_name = FIND_NAME) # create fund_cat_name column
Identify new and reused funds for newest fiscal year. Recode funds to take into account different fund numbers/names over the years. Update fund_ab_in_2021.xlsx with any changes from previous fiscal year.
Clarify and add steps for identifying new and reused funds.
For funds that were reused once, a 9 replaces the 0 as the first
digit. If reused twice, then the first two values are 10.
- Ex. 0350 –> 9350 because its use changed.
- Ex. 0367 becomes 10367 because its use has changed twice now. There
was fund 0367 originally, then its use changed and it was recoded as
9367, and now it changed again so it is a 10367.
# if first character is a 0, replace with a 9
rev_1998_2022 <- allrevfiles %>%
mutate(fund = ifelse(fy < 2002 & fund %in% c("0730", "0241", "0350", "0367", "0381", "0382", "0526", "0603", "0734", "0913", "0379"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2008 & fund %in% c("0027", "0033", "0037", "0058", "0062", "0066", "0075", "0083", "0116", "0119", "0120", "0122", "0148", "0149", "0157", "0158", "0166", "0194", "0201", "0209", "0211", "0217", "0223", "0231", "0234", "0253", "0320", "0503", "0505", "0512", "0516", "0531", "0532", "0533", "0547", "0563", "0579", "0591", "0606", "0616", "0624", "0659", "0662", "0665", "0676", "0710",
"0068", "0076", "0115", "0119", "0168", "0182", "0199", "0241", "0307", "0506", "0509", "0513"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2016 & fund %in% c("0263", "0399", "0409"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2017 & fund == "0364", str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2018 & fund %in% c("0818", "0767", "0671", "0593", "0578"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy>1999 & fy < 2018 & fund == "0231", "10231", fund) ) %>%
mutate(fund = ifelse(fy < 2019 & fund %in% c("0161", "0489", "0500", "0612", "0893", "0766"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2020 & fund %in% c("0254", "0304", "0324", "0610", "0887", "0908", "0939", "0968"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2021 & fund %in% c("0255", "0325", "0348", "0967", "0972"), str_replace(fund, "0","9"), fund))
# if first character is a 0, replace with a 9
exp_1998_2022 <- allexpfiles %>%
mutate(fund = ifelse(fy < 2002 & fund %in% c("0730", "0241", "0350", "0367", "0381", "0382", "0526", "0603", "0734", "0913", "0379"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2008 & fund %in% c("0027", "0033", "0037", "0058", "0062", "0066", "0075", "0083", "0116", "0119", "0120", "0122", "0148", "0149", "0157", "0158", "0166", "0194", "0201", "0209", "0211", "0217", "0223", "0231", "0234", "0253", "0320", "0503", "0505", "0512", "0516", "0531", "0532", "0533", "0547", "0563", "0579", "0591", "0606", "0616", "0624", "0659", "0662", "0665", "0676", "0710",
"0068", "0076", "0115", "0119", "0168", "0182", "0199", "0241", "0307", "0506", "0509", "0513"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2016 & fund %in% c("0263", "0399", "0409"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2017 & fund == "0364", str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2018 & fund %in% c("0818", "0767", "0671", "0593", "0578"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy>1999 & fy < 2018 & fund == "0231", "10231", fund) ) %>%
mutate(fund = ifelse(fy < 2019 & fund %in% c("0161", "0489", "0500", "0612", "0893", "0766"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2020 & fund %in% c("0254", "0304", "0324", "0610", "0887", "0908", "0939", "0968"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2021 & fund %in% c("0255", "0325", "0348", "0967", "0972"), str_replace(fund, "0","9"), fund))
Note:
exp:1998_2022and thefunds_ab_in_2021dataframes have a fund_cat_name variable (AND THEY DONT MATCH 100%) which ends up creating a .x and .y version of the variable when they are joined together. Inspect this more later. It is not a huge concern because the fund number is what matters more.
funds_ab_in_2021 = read_excel("C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/FY2021 replication/funds_ab_in_2021.xlsx")
exp_temp <- exp_1998_2022 %>%
arrange(fund, fy) %>%
filter(expenditure != 0) %>% # keeps everything that is not zero
# join funds_ab_in_2021 to exp_temp
left_join(funds_ab_in_2021, by = "fund") # matches most recent fund number and name
exp_1998_2022 and rev_1998_2022. These are
then saved as exp_temp and rev_temp while recoding variables. This is
BEFORE category groups are created and cleaned below. Only a temporary
file, do not use for analysis.# remove from computer memory to free up space (in case your computer needs it)
rm(allexpfiles)
rm(allrevfiles)
Aggregate expenditures: Save tax refunds as negative revenue. Code refunds to match the rev_type codes (02=income taxes, 03 = corporate income taxes, 06=sales tax, 09=motor fuel tax, 24=insurance taxes and fees, 35 = all other tax refunds)
## negative revenue becomes tax refunds
tax_refund_long <- exp_temp %>%
# fund != "0401" # removes State Trust Funds
filter(fund != "0401" & (object=="9910"|object=="9921"|object=="9923"|object=="9925")) %>%
# keeps these objects which represent revenue, insurance, treasurer,and financial and professional reg tax refunds
mutate(refund = case_when(
fund=="0278" & sequence == "00" ~ "02", # for income tax refund
fund=="0278" & sequence == "01" ~ "03", # tax administration and enforcement and tax operations become corporate income tax refund
fund == "0278" & sequence == "02" ~ "02",
object=="9921" ~ "21", # inheritance tax and estate tax refund appropriation
object=="9923" ~ "09", # motor fuel tax refunds
obj_seq_type == "99250055" ~ "06", # sales tax refund
fund=="0378" & object=="9925" ~ "24", # insurance privilege tax refund
fund=="0001" & object=="9925" ~ "35", #all other taxes
T ~ "CHECK")) # if none of the items above apply to the observations, then code them as 00
exp_temp <- left_join(exp_temp, tax_refund_long) %>%
mutate(refund = ifelse(is.na(refund),"not refund", as.character(refund)))
tax_refund <- tax_refund_long %>%
group_by(refund, fy)%>%
summarize(refund_amount = sum(expenditure, na.rm = TRUE)/1000000) %>%
pivot_wider(names_from = refund, values_from = refund_amount, names_prefix = "ref_") %>%
mutate_all(~replace_na(.,0)) %>%
arrange(fy)
exp_temp <- exp_temp %>% filter(refund == "not refund")
# remove the items we recoded in tax_refund_long
#exp_temp <- anti_join(exp_temp, tax_refund_long) # should be 156 fewer observations after antijoin
tax_refund will ultimately be removed from expenditure
totals and instead subtracted from revenue totals (since they were tax
refunds).
# early agencies replaced by successors
# recodes old agency numbers to consistent agency number
exp_temp <- exp_temp %>%
mutate(agency = case_when(
(agency=="438"| agency=="475" |agency == "505") ~ "440",
# financial institution & professional regulation &
# banks and real estate --> coded as financial and professional reg
agency == "473" ~ "588", # nuclear safety moved into IEMA
(agency =="531" | agency =="577") ~ "532", # coded as EPA
(agency =="556" | agency == "538") ~ "406", # coded as agriculture
agency == "560" ~ "592", # IL finance authority (fire trucks and agriculture stuff)to state fire marshal
agency == "570" & fund == "0011" ~ "494", # city of Chicago road fund to transportation
TRUE ~ (as.character(agency))))
State payments to the following pension systems:
• Teachers Retirement System (TRS)
• State Employee Retirement System (SERS)
• State University Retirement System (SURS)
• Judges Retirement System (JRS)
• General Assembly Retirement System (GARS)
“Operating costs of administering the pensions are not included in this category. Fiscal Futures only includes the state’s payments into the pension funds as”pension expenditures.” Note also that these payments are subtracted from reported agency spending in calculating other categories.”
obj_seq_type== “11600000” should NOT be included in pensions, correct?
why are local health insurance reserves included as pensions?
& fund != “0183” & appr_org != “55”
#special accounting of pension obligation bond (POB)-funded contributions to JRS, SERS, GARS, TRS
exp_temp <- exp_temp %>%
arrange(fund) %>%
mutate(pension = case_when(
# objects were weird for 2010 and 2011
(object=="4431" & fund=="0473" & (fy==2010 | fy==2011)) ~ 3, # teachers retirement system,
(object=="1298" & (fy==2010 | fy==2011) & (fund=="0477" | fund=="0479" | fund=="0481")) ~ 3, #judges retirement
(object=="4431" | (object>"1159" & object<"1166") ) ~ 1, # 4431 = easy to find pension items
# objects 1159 to 1166 are all considered Retirement by Comptroller
# object == 1167 also appears to be Other Retirement but isn't used yet
fund == "0825" ~ 4, # pension obligation acceleration bond
TRUE ~ 0))
table(exp_temp$pension)
##
## 0 1 3 4
## 154425 8814 8 8
# create file with all pension items to find any mistakes
exp_temp %>%
filter(pension > 0) %>%
write_csv("all_pensions.csv")
exp_temp %>%
filter(pension > 0)
exp_temp <- exp_temp %>%
# mutate(object = ifelse((pension == 3 & in_ff == "0"), "4431", object)) %>% # why this step?
mutate(pension = ifelse(pension ==1 & in_ff == "0", 2, pension)) %>% # coded as 2 if it was supposed to be excluded. Allows or checking work in between steps.
mutate(in_ff = ifelse((pension ==2 | pension ==3 | pension == 4), "1", in_ff))
table(exp_temp$pension)
##
## 0 1 2 3 4
## 154425 8666 148 8 8
Pension = 2 represents retirement pension payments that were excluded from the fiscal futures analysis by default ( in_ff was 0 because they were categorized as fund category I. State Trust Funds) but should be included and added to the revenue side under “Other Revenues” in later steps.
Summarizes the total expenditures for each pension code for each year.
#creates long version without any aggregation
pension_2_long <- exp_temp %>%
filter(pension == 2 ) %>%
rename(year = fy)
exp_temp <- anti_join(exp_temp, pension_2_long) # 148 observations removed with antijoin
pension2_fy22<- pension_2_long %>%
group_by(year) %>%
summarize(pension_amt = sum(expenditure)/1000000)
# all other pensions (1,3,4) codes get agency code 901 for State Pension Contributions
exp_temp <- exp_temp %>%
mutate(agency = ifelse(pension>0, "901", as.character(agency)),
agency_name = ifelse(agency == "901", "State Pension Contributions", as.character(agency_name)))
pension2_fy22 # used in final tables
pension_2_long %>% filter(fund == "0138" | fund == "0183" | fund == "0190" | fund == "0193") # should these be included?
Should the items above be considered pension expenditures? Currently they are coded as pension == 2 and added to revenues in later steps.
transfers_drop <- exp_temp %>% filter(
agency == "799" | # statutory transfers
object == "1993" | # interfund cash transfers
object == "1298") # purchase of investments
exp_temp <- anti_join(exp_temp, transfers_drop) # 13650 obs dropped with antijoin
Employer contributions for group insurance (contributions count as a revenue source).
Creates the employee healthcare costs amount to be added to the revenue side that would have been excluded due to being I. State Trust Funds and therefore in_ff=0 : Employer contributions are a revenue source and should be subtracted from state employee healthcare costs (expenditures + premiums = net costs).
Added line of code Sept. 21 2022: eehc = ifelse(obj_seq_type == “19000000” & wh_approp_name == “GROUP INSURANCE”, 1, eehc)) %>%
Should move group insurance that was categorized from central
management to employee health costs.
eehc = additional group insurance items under obj_seq_type == “11201000” in 2019 but smaller dollar amount
object 1900 is for Lump Sums and Other Purposes
# identifies eehc values that would have been excluded due to in_ff == 0 before recoding
exp_temp <- exp_temp %>%
mutate(eehc = ifelse(object == "1180", 1, 0)) %>%
mutate(eehc = ifelse(obj_seq_type == "19000000" & wh_approp_name == "GROUP INSURANCE" & fy > 2020, 1, eehc) ) %>%
# mutate(expenditure = ifelse(obj_seq_type == "19000000" & wh_approp_name == "GROUP INSURANCE" & fy > 2020, 0, expenditure) ) %>% # Francis's method 9.22.22
mutate(eehc = ifelse((eehc == 1 & in_ff =="0"), 2, eehc)) %>% # if eehc == 1 AND in_ff was zero, then recode eehc to 2, otherwise leave eehc as it was. Mostly helps flag things that would have been excluded due to default in_ff coding
mutate(in_ff = ifelse(eehc == 2, "1", in_ff) )
eehc_2_long <- exp_temp %>%
# recodes in_ff to 1 if eehc was coded to 2 to make sure they are included in fiscal futures.
filter(eehc == 2) # keeps only eehc == 2, items that would have been excluded based on in_ff original coding
# 146 observations
eehc_2_long
# summarizes by year totals for state employee healthcare costs == 2
eehc2_amt <- eehc_2_long %>% group_by(fy) %>%
summarize(eehc = sum(expenditure, na.rm = TRUE)/1000000)
# examine all eehc items in csv file to check mistakes
exp_temp %>%
mutate(eehc = ifelse(object == "1180", 1, 0)) %>%
mutate(eehc = ifelse(obj_seq_type == "19000000" & wh_approp_name == "GROUP INSURANCE" & fy > 2020, 1, eehc) ) %>%
mutate(eehc = ifelse((eehc == 1 & in_ff =="0"), 2, eehc)) %>%
filter(eehc >0) %>%
write_csv("all_eehc.csv")
exp_temp <- anti_join(exp_temp, eehc_2_long, by = c("fy", "fund", "fund_name", "agency", "agency_name", "appr_org", "org_name", "obj_seq_type", "appn_net_xfer", "expenditure", "data_source", "object", "category", "sequence", "type", "trans_agency", "trans_type", "wh_approp_name"))
# should remove the 146 eehc==2 observations from exp_temp
# 149451 - 146 = 149305 obs (expected value after antijoin)
State Employee Health Care = Sum of expenditures for “health care coverage as elected by members per state employees group insurance act.” The payments are made from the Health Insurance Reserve Fund. We subtract the share that came from employee contributions. Employee contributions are not considered a revenue source or an expenditure in our analysis.
exp_temp <- exp_temp %>%
mutate(agency = case_when( # turns specific items into State Employee Healthcare (agency=904)
fund=="0907" & (agency=="416" & appr_org=="20") ~ "904", # central management is agency 416
fund=="0907" & (agency=="478" & appr_org=="80") ~ "904", # agency = 478: healthcare & family services, stopped using this in 2012
fund=="0001" & appr_org=="20" & object=="1900" & agency=="416" & (fy>2002 & fy<2006) ~ "904",
fund=="0001" & appr_org=="20" & object=="1900" & agency=="416" & (fy>2020) ~ "904",
obj_seq_type == "19000000" & wh_approp_name == "GROUP INSURANCE" & fy > 2020 ~ "904",
TRUE ~ as.character(agency))) %>%
mutate(agency_name = ifelse(agency == "904", "STATE EMPLOYEE HEALTHCARE", as.character(agency_name)),
group = ifelse(agency == "904", "904", as.character(agency))) # creates group variable.
#Default group = agency number
Separate transfers to local from parent agencies come from DOR(492) or Transportation (494). Treats muni revenue transfers as expenditures, not negative revenue.
The share of certain taxes levied state-wide at a common rate and then transferred to local governments. (Purely local-option taxes levied by specific local governments with the state acting as collection agent are not included.)
The five corresponding revenue items are:
• Local share of Personal Income Tax
• Local share of General Sales Tax
• Personal Property Replacement Tax on Business Income
• Personal Property Replacement Tax on Public Utilities
• Local share of Motor Fuel Tax
exp_temp <- exp_temp %>% mutate(
agency = case_when(fund=="0515" & object=="4470" & type=="08" ~ "971", # income tax
fund=="0515" & object=="4491" & type=="08" & sequence=="00" ~ "971",
fund=="0802" & object=="4491" ~ "972", #pprt transfer
fund=="0515" & object=="4491" & type=="08" & sequence=="01" ~ "976", #gst to local
fund=="0627" & object=="4472"~ "976" ,
fund=="0648" & object=="4472" ~ "976",
fund=="0515" & object=="4470" & type=="00" ~ "976",
object=="4491" & (fund=="0188"|fund=="0189") ~ "976",
fund=="0187" & object=="4470" ~ "976",
fund=="0186" & object=="4470" ~ "976",
object=="4491" & (fund=="0413"|fund=="0414"|fund=="0415") ~ "975", #mft to local
TRUE ~ as.character(agency)),
agency_name = case_when(agency == "971"~ "INCOME TAX 1/10 TO LOCAL",
agency == "972" ~ "PPRT TRANSFER TO LOCAL",
agency == "976" ~ "GST TO LOCAL",
agency == "975" ~ "MFT TO LOCAL",
TRUE~as.character(agency_name)),
group = ifelse(agency>"970" & agency < "977", as.character(agency), as.character(group)))
table(exp_temp$group)
##
## 101 102 103 105 107 108 109 110 112 115 120 131 140
## 553 2 234 154 89 191 136 128 162 127 16 350 7
## 155 156 167 201 210 275 285 290 295 310 330 340 350
## 75 114 117 1337 14 362 233 458 1147 210 203 801 3926
## 360 370 402 406 416 418 420 422 425 426 427 440 442
## 1666 785 1755 4513 3885 2372 10718 9359 955 7247 772 3611 592
## 444 445 446 448 452 458 466 478 482 492 493 494 497
## 11061 21 1085 17 593 289 574 2951 5360 3993 1882 9260 2416
## 503 506 507 509 510 511 517 520 524 525 526 527 528
## 408 13 320 31 22 8812 127 4 1086 28 170 39 1810
## 529 532 534 537 540 541 542 546 548 554 555 557 558
## 18 5585 5 190 64 1305 172 808 253 25 24 204 269
## 559 562 563 564 565 567 568 569 571 574 575 576 578
## 245 18 676 15 182 172 2 438 65 76 85 1 223
## 579 580 583 585 586 587 588 589 590 591 592 593 598
## 423 316 20 43 5195 678 2612 550 165 187 1032 141 10
## 601 608 612 616 620 628 636 644 664 676 684 691 692
## 700 175 129 140 98 145 113 180 265 442 860 898 756
## 693 695 901 904 971 972 975 976
## 8 196 8676 52 24 24 72 1112
exp_temp <- exp_temp %>% filter(in_ff != 0) # drops in_ff = 0 funds AFTER dealing with net-revenue above
# 149305 obs to 145185 obs after filtering !=0
Principal and interest payment on both short-term and
long-term debt. We do not include escrow payments.
8813____ is for interest INCLUDE AS COST
8811____ is for principle EXCLUDE from analysis
8841____ is for escrow payments EXCLUDE from
analysis
8800____ is for tollway
Note: After feedback from GOMB and discussion with research team, we are no longer including principal payments as an expenditure. We do not count bond proceeds as revenue, and we do not count principal payments as a debt service expenditure.
# exp_temp <- exp_temp %>%
# # 8000 objects
# mutate(interest_pmt = if_else(object>"8810" & object<"8814", "903", as.character(agency)),
# agency_name = if_else(agency == "903", "DEBT SERVICE", as.character(agency_name)),
# group = if_else(agency == "903", "903", as.character(group)))
princ_pmt <- exp_temp %>%
filter(object == "8811" | object == "8841") # principal and escrow
exp_temp <- anti_join(exp_temp, princ_pmt) %>%
# mutate(group = if_else(object == "8813", "903", as.character(group)),
# agency = if_else(object == "8813", "903", as.character(agency_name)),
# agency_name = if_else(agency == "903", "DEBT SERVICE", as.character(agency_name)))
mutate(agency = if_else(object== "8813", "903", as.character(agency)),
agency_name = if_else(agency == "903", "DEBT SERVICE", as.character(agency_name)),
group = if_else(agency == "903", "903", as.character(group)))
# exp_temp <- exp_temp %>%
# #8813 is interest payments
# mutate(in_ff = if_else(agency == "370" & object == "8813", 1, 0)) %>%
# filter(in_ff == 1) %>%
# mutate(group = if_else(object == "8813", "903", as.character(group)))
exp_temp<- exp_temp %>%
#mutate(agency = as.numeric(agency) ) %>%
# arrange(agency)%>%
mutate(
group = case_when(
agency>"100"& agency<"200" ~ "910", # legislative
agency == "528" | (agency>"200" & agency<"300") ~ "920", # judicial
(agency>"309" & agency<"400") ~ "930", # elected officers
agency == "586" ~ "959", # create new K-12 group
agency=="402" | agency=="418" | agency=="478" | agency=="444" | agency=="482" ~ as.character(agency), # aging, CFS,HFS, human services, public health
T ~ as.character(group)),
#chip = ifelse(fund == "0001" & agency == "478" & appr_org == "65" &object=="4900" & (sequence == "20" | sequence == "54" | sequence == "61" | sequence == "62" | sequence == "65"),1 ,0)
) %>%
mutate(group = case_when(
agency=="478" & (appr_org=="01" | appr_org == "65" | appr_org=="88") & (object=="4900" | object=="4400") ~ "945", # separates CHIP from health and human services and saves it as Medicaid
agency == "586" & fund == "0355" ~ "478", # 586 (Board of Edu) has special education which is part of medicaid
#agency == "586" & appr_org == "18" ~ "945", # Spec. Edu Medicaid Matching
agency=="425" | agency=="466" | agency=="546" | agency=="569" | agency=="578" | agency=="583" | agency=="591" | agency=="592" | agency=="493" | agency=="588" ~ "941", # public safety & Corrections
agency=="420" | agency=="494" | agency=="406" | agency=="557" ~ as.character(agency), # econ devt & infra
agency=="511" | agency=="554" | agency=="574" | agency=="598" ~ "946", # Capital improvement
agency=="422" | agency=="532" ~ as.character(agency), # environment & nat. resources
agency=="440" | agency=="446" | agency=="524" | agency=="563" ~ "944", # business regulation
agency=="492" ~ "492", # revenue
agency == "416" ~ "416", # central management services
agency=="448" & fy > 2016 ~ "416", #add DoIT to central management
T ~ as.character(group))) %>%
mutate(group = case_when(
agency=="684" | agency=="691" ~ as.character(agency),
agency=="692" | agency=="695" | (agency>"599" & agency<"677") ~ "960", # higher education
agency=="427" ~ as.character(agency), # employment security
agency=="507"| agency=="442" | agency=="445" | agency=="452" |agency=="458" | agency=="497" ~ "948", # other departments
# other boards & Commissions
agency=="503" | agency=="509" | agency=="510" | agency=="565" |agency=="517" | agency=="525" | agency=="526" | agency=="529" | agency=="537" | agency=="541" | agency=="542" | agency=="548" | agency=="555" | agency=="558" | agency=="559" | agency=="562" | agency=="564" | agency=="568" | agency=="579" | agency=="580" | agency=="587" | agency=="590" | agency=="527" | agency=="585" | agency=="567" | agency=="571" | agency=="575" | agency=="540" | agency=="576" | agency=="564" | agency=="534" | agency=="520" | agency=="506" | agency == "533" ~ "949",
# non-pension expenditures of retirement funds moved to "Other Departments"
agency=="131" | agency=="275" | agency=="589" |agency=="593"|agency=="594"|agency=="693" ~ "948",
T ~ as.character(group))) %>%
mutate(group_name =
case_when(
group == "900" ~ "NOT IN FRAME",
group == "901" ~ "STATE PENSION CONTRIBUTION",
group == "903" ~ "DEBT SERVICE",
group == "910" ~ "LEGISLATIVE" ,
group == "920" ~ "JUDICIAL" ,
group == "930" ~ "ELECTED OFFICERS" ,
group == "940" ~ "OTHER HEALTH-RELATED",
group == "941" ~ "PUBLIC SAFETY" ,
group == "942" ~ "ECON DEVT & INFRASTRUCTURE" ,
group == "943" ~ "CENTRAL SERVICES",
group == "944" ~ "BUS & PROFESSION REGULATION" ,
group == "945" ~ "MEDICAID" ,
group == "946" ~ "CAPITAL IMPROVEMENT" ,
group == "948" ~ "OTHER DEPARTMENTS" ,
group == "949" ~ "OTHER BOARDS & COMMISSIONS" ,
group == "959" ~ "K-12 EDUCATION" ,
group == "960" ~ "UNIVERSITY EDUCATION" ,
group == agency ~ as.character(group),
TRUE ~ "Check name"),
year = fy)
table(exp_temp$group)
##
## 402 406 416 418 420 422 426 427 444 478 482 492 494
## 1755 4461 3571 2372 10708 9354 7245 772 11030 1596 5359 3251 9250
## 532 557 684 691 901 903 904 910 920 930 941 944 945
## 5543 204 860 875 8676 54 52 2105 4972 7060 8702 6338 842
## 946 948 949 959 960 971 972 975 976
## 8865 4359 5498 5147 3004 24 24 72 1112
# number of observations within each group category
table(exp_temp$group_name)
##
## 402 406
## 1755 4461
## 416 418
## 3554 2372
## 420 422
## 10708 9354
## 426 427
## 7245 772
## 444 478
## 11030 1592
## 482 492
## 5359 3251
## 494 532
## 9250 5543
## 557 684
## 204 860
## 691 904
## 875 52
## 971 972
## 24 24
## 975 976
## 72 1112
## BUS & PROFESSION REGULATION CAPITAL IMPROVEMENT
## 6338 8865
## Check name DEBT SERVICE
## 21 54
## ELECTED OFFICERS JUDICIAL
## 7060 4972
## K-12 EDUCATION LEGISLATIVE
## 5147 2105
## MEDICAID OTHER BOARDS & COMMISSIONS
## 842 5498
## OTHER DEPARTMENTS PUBLIC SAFETY
## 4359 8702
## STATE PENSION CONTRIBUTION UNIVERSITY EDUCATION
## 8676 3004
transfers_long <- exp_temp %>%
filter(group == "971" |group == "972" | group == "975" | group == "976")
transfers <- transfers_long %>%
group_by(year, group ) %>%
summarize(sum_expenditure = sum(expenditure)/1000000) %>%
pivot_wider(names_from = "group", values_from = "sum_expenditure", names_prefix = "exp_" )
exp_temp <- anti_join(exp_temp, transfers_long)
# write_csv(exp_temp, "all_expenditures_recoded.csv")
All expenditures recoded but not aggregated: Allows for inspection of individual expenditures within larger categories. This stage of the data is extremely useful for investigating almost all questions we have about the data.
Note that these are the raw figures BEFORE we take the additional steps:
exp_temp %>%
group_by(year, group) %>%
summarize(sum_expenditure = sum(expenditure)/1000000) %>%
arrange(year) %>%
pivot_wider(names_from = "group", values_from = "sum_expenditure")
aggregate_exp_labeled <- exp_temp %>%
group_by(year, group_name) %>%
summarize(sum_expenditure = sum(expenditure)/1000000) %>%
arrange(year) %>%
pivot_wider(names_from = "group_name", values_from = "sum_expenditure")
aggregate_exp_labeled
For aggregating revenue, use the rev_1998_2022 dataframe, join the funds_ab_in_2021 file to it, and then join the ioc_source_type file to the dataset.
You need to update the ioc_source_type file every year!
include how to do that later
# fund info to revenue for all years
rev_temp <- inner_join(rev_1998_2022, funds_ab_in_2021, by = "fund") %>% arrange(source)
# need to update the ioc_source_type file every year!
ioc_source_type <- read_dta("C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/FY2021 replication/ioc_source_updated21.dta")
rev_temp <- left_join(rev_temp, ioc_source_type, by = "source")
# automatically used source, source name does not match for the join to work using source_name
rev_temp <- rev_temp %>%
mutate(
rev_type = ifelse(rev_type=="57" & agency=="478" & (source=="0618"|source=="2364"|source=="0660"|source=="1552"| source=="2306"| source=="2076"|source=="0676"|source=="0692"), "58", rev_type),
rev_type_name = ifelse(rev_type=="58", "FEDERAL TRANSPORTATION", rev_type_name),
rev_type = ifelse(rev_type=="57" & agency=="494", "59", rev_type),
rev_type_name = ifelse(rev_type=="59", "FEDERAL TRANSPORTATION", rev_type_name),
rev_type_name = ifelse(rev_type=="57", "FEDERAL OTHER", rev_type_name),
rev_type = ifelse(rev_type=="6", "06", rev_type),
rev_type = ifelse(rev_type=="9", "09", rev_type))
rev_temp %>%
group_by(fy, rev_type_name) %>%
summarise(receipts = sum(receipts, na.rm = TRUE)/1000000)
#collect optional premiums to fund 0907 for use in eehc expenditure
rev_temp <- rev_temp %>%
mutate(med_option_recent = ifelse(
fund=="0907" & (source=="0120"| source=="0121"| (source>"0345" & source<"0357")|(source>"2199" & source<"2209")), 1, 0),
# adds more rev_types
rev_type = case_when(
fund =="0427" ~ "12", # pub utility tax
fund == "0742" | fund == "0473" ~ "24", # insurance and fees
fund == "0976" ~ "36",# receipts from rev producing
fund == "0392" |fund == "0723" ~ "39", # licenses and fees
fund == "0656" ~ "78", #all other rev sources
TRUE ~ as.character(rev_type)))
#if not mentioned, then rev_type as it was
# optional insurance premiums
med_option_recent <- rev_temp %>%
group_by(fy, med_option_recent) %>%
summarize(med_option_amt_recent = sum(receipts)/1000000) %>%
filter(med_option_recent == 1) %>%
rename(year = fy) %>%
select(-med_option_recent)
med_option_long <- rev_temp %>% filter(med_option_recent == 1)
# 361 observations have med_option_recent == 1
med_option_long %>%
group_by(fy, med_option_recent) %>%
summarize(med_option_amt_recent = sum(receipts)/1000000) %>%
rename(year = fy) %>%
select(-med_option_recent)
rev_temp <- rev_temp %>% filter(med_option_recent != 1)
Still need to add med_option data to Other Revenues
rev_temp <- rev_temp %>%
filter(in_ff == 1) %>%
mutate(local = ifelse(is.na(local), 0, local)) %>%
filter(local != 1)
in_from_out <- c("0847", "0867", "1175", "1176", "1177", "1178", "1181", "1182", "1582", "1592", "1745", "1982", "2174", "2264")
rev_temp <- rev_temp %>%
mutate(rev_type_new = ifelse(source %in% in_from_out, "76", rev_type))
# if source contains any of the codes in in_from_out, code them as 76 (all other rev).
# revenue types to drop
drop_type <- c("32", "45", "51", "66", "72", "75", "79", "98")
# drops Blank, Student Fees, Retirement contributions, proceeds/investments,
# bond issue proceeds, interagency receipts, cook IGT, Prior year refunds.
rev_temp <- rev_temp %>% filter(!rev_type_new %in% drop_type)
# keep observations that do not have a revenue type mentioned in drop_type
table(rev_temp$rev_type_new)
##
## 02 03 06 09 12 15 18 21 24 27 30 31 33
## 144 116 803 120 556 247 43 1419 428 73 653 118 119
## 35 36 39 42 48 54 57 58 59 60 63 76 78
## 629 4978 8628 2569 30 1196 6215 590 219 102 4847 149 10451
## 99
## 756
rev_temp %>%
group_by(fy, rev_type_new) %>%
summarize(total_reciepts = sum(receipts)/1000000) %>%
pivot_wider(names_from = rev_type_new, values_from = total_reciepts, names_prefix = "rev_")
# combines smallest 4 categories to to "Other"
# they were the 4 smallest in past years, are they still the 4 smallest?
rev_temp <- rev_temp %>%
mutate(rev_type_new = ifelse(rev_type=="30" | rev_type=="60" | rev_type=="63" | rev_type=="76" | rev_type=="78" , "78", rev_type_new))
#table(rev_temp$rev_type_new) # check work
rm(rev_1998_2022)
rm(exp_1998_2022)
State employer contributions (eehc from eehc2_amt) should be moved to Other revenues.
State pension contributions (pension_amt from pension2_fy22)
should be added to Other revenues.
Local Government Transfers (exp_970) should be on the expenditure side.
Subtract employee insurance premiums from 904 (State Employee Healthcare Expenditures - Employee Premiums = Actual state healthcare costs. Subtract med_option_amt_recent in med_option_recent from exp_904 in ff_exp).
ff_rev <- rev_temp %>%
group_by(rev_type_new, fy) %>%
summarize(sum_receipts = sum(receipts, na.rm=TRUE)/1000000 ) %>%
pivot_wider(names_from = "rev_type_new", values_from = "sum_receipts", names_prefix = "rev_")
ff_rev<- left_join(ff_rev, tax_refund)
ff_rev <- left_join(ff_rev, pension2_fy22, by=c("fy" = "year"))
ff_rev <- left_join(ff_rev, eehc2_amt)
ff_rev <- mutate_all(ff_rev, ~replace_na(.,0))
ff_rev <- ff_rev %>%
mutate(rev_02 = rev_02 - ref_02,
rev_03 = rev_03 - ref_03,
rev_06 = rev_06 - ref_06,
rev_09 = rev_09 - ref_09,
rev_21 = rev_21 - ref_21,
rev_24 = rev_24 - ref_24,
rev_35 = rev_35 - ref_35,
rev_78new = rev_78 + pension_amt + eehc
) %>%
select(-c(ref_02:ref_35, rev_76, rev_78, rev_99, rev_NA, pension_amt, eehc))
ff_rev
Since I already pivot_wider()ed the table in the previous code chunk, I now change each column’s name by using rename() to set new variable names. Ideally the final dataframe would have both the variable name and the variable label but I have not done that yet.
aggregate_rev_labels <- ff_rev %>%
rename("INDIVIDUAL INCOME TAXES, gross of local, net of refunds" = rev_02,
"CORPORATE INCOME TAXES, gross of PPRT, net of refunds" = rev_03,
"SALES TAXES, gross of local share" = rev_06 ,
"MOTOR FUEL TAX, gross of local share, net of refunds" = rev_09 ,
"PUBLIC UTILITY TAXES, gross of PPRT" = rev_12,
"CIGARETTE TAXES" = rev_15 ,
"LIQUOR GALLONAGE TAXES" = rev_18,
"INHERITANCE TAX" = rev_21,
"INSURANCE TAXES&FEES&LICENSES, net of refunds" = rev_24 ,
"CORP FRANCHISE TAXES & FEES" = rev_27,
# "HORSE RACING TAXES & FEES" = rev_30, # in Other
"MEDICAL PROVIDER ASSESSMENTS" = rev_31 ,
# "GARNISHMENT-LEVIES " = rev_32 , # dropped
"LOTTERY RECEIPTS" = rev_33 ,
"OTHER TAXES" = rev_35,
"RECEIPTS FROM REVENUE PRODUCNG" = rev_36,
"LICENSES, FEES & REGISTRATIONS" = rev_39 ,
"MOTOR VEHICLE AND OPERATORS" = rev_42 ,
# "STUDENT FEES-UNIVERSITIES" = rev_45, # dropped
"RIVERBOAT WAGERING TAXES" = rev_48 ,
# "RETIREMENT CONTRIBUTIONS " = rev_51, # dropped
"GIFTS AND BEQUESTS" = rev_54,
"FEDERAL OTHER" = rev_57 ,
"FEDERAL MEDICAID" = rev_58,
"FEDERAL TRANSPORTATION" = rev_59 ,
# "OTHER GRANTS AND CONTRACTS" = rev_60, #other
# "INVESTMENT INCOME" = rev_63, # other
# "PROCEEDS,INVESTMENT MATURITIES" = rev_66 , #dropped
# "BOND ISSUE PROCEEDS" = rev_72, #dropped
# "INTER-AGENCY RECEIPTS" = rev_75, #dropped
# "TRANSFER IN FROM OUT FUNDS" = rev_76, #other
"ALL OTHER SOURCES" = rev_78new ,
# "COOK COUNTY IGT" = rev_79, #dropped
# "PRIOR YEAR REFUNDS" = rev_98 #dropped
)
aggregate_rev_labels
# Still contains columns that should be dropped for the clean final aggregate table. Drop the variables I don't want in the output table in the "graphs" section.
Create state employee healthcare costs that reflects the health costs minus the optional insurance premiums that came in (904_new = 904 - med_option_amt_recent).
Create exp_970 for all local government transfers (exp_971 + exp_972 + exp_975 + exp_976).
ff_exp <- exp_temp %>%
group_by(fy, group) %>%
summarize(sum_expenditures = sum(expenditure, na.rm=TRUE)/1000000 ) %>%
pivot_wider(names_from = "group", values_from = "sum_expenditures", names_prefix = "exp_")%>%
# join state employee healthcare and subtract employee premiums
left_join(med_option_recent, by = c("fy" = "year")) %>%
mutate(exp_904_new = exp_904 - med_option_amt_recent) %>% # state employee healthcare
# join local transfers and create exp_970
left_join(transfers, by = c("fy" = "year")) %>%
mutate(exp_970 = exp_971 + exp_972 + exp_975 + exp_976)
ff_exp<- ff_exp %>% select(-c(exp_904, med_option_amt_recent, exp_971:exp_976)) # drop unwanted columns
ff_exp
Create total revenues and total expenditures only:
rev_long and exp_long, expenditures
and revenues are in the same format and can be combined together for the
totals and gap each year.rev_long <- pivot_longer(ff_rev, rev_02:rev_78new, names_to = c("type","Category"), values_to = "Dollars", names_sep = "_") %>%
rename(Year = fy) %>%
mutate(Category_name = case_when(
Category == "02" ~ "INDIVIDUAL INCOME TAXES, gross of local, net of refunds" ,
Category == "03" ~ "CORPORATE INCOME TAXES, gross of PPRT, net of refunds" ,
Category == "06" ~ "SALES TAXES, gross of local share" ,
Category == "09" ~ "MOTOR FUEL TAX, gross of local share, net of refunds" ,
Category == "12" ~ "PUBLIC UTILITY TAXES, gross of PPRT" ,
Category == "15" ~ "CIGARETTE TAXES" ,
Category == "18" ~ "LIQUOR GALLONAGE TAXES" ,
Category == "21" ~ "INHERITANCE TAX" ,
Category == "24" ~ "INSURANCE TAXES&FEES&LICENSES, net of refunds " ,
Category == "27" ~ "CORP FRANCHISE TAXES & FEES" ,
Category == "30" ~ "HORSE RACING TAXES & FEES", # in Other
Category == "31" ~ "MEDICAL PROVIDER ASSESSMENTS" ,
Category == "32" ~ "GARNISHMENT-LEVIES" , # dropped
Category == "33" ~ "LOTTERY RECEIPTS" ,
Category == "35" ~ "OTHER TAXES" ,
Category == "36" ~ "RECEIPTS FROM REVENUE PRODUCNG",
Category == "39" ~ "LICENSES, FEES & REGISTRATIONS" ,
Category == "42" ~ "MOTOR VEHICLE AND OPERATORS" ,
Category == "45" ~ "STUDENT FEES-UNIVERSITIES", # dropped
Category == "48" ~ "RIVERBOAT WAGERING TAXES" ,
Category == "51" ~ "RETIREMENT CONTRIBUTIONS" , # dropped
Category == "54" ~ "GIFTS AND BEQUESTS",
Category == "57" ~ "FEDERAL OTHER" ,
Category == "58" ~ "FEDERAL MEDICAID",
Category == "59" ~ "FEDERAL TRANSPORTATION" ,
Category == "60" ~ "OTHER GRANTS AND CONTRACTS", #other
Category == "63" ~ "INVESTMENT INCOME", # other
Category == "66" ~ "PROCEEDS,INVESTMENT MATURITIES" , #dropped
Category == "72" ~ "BOND ISSUE PROCEEDS", #dropped
Category == "75" ~ "INTER-AGENCY RECEIPTS ", #dropped
Category == "76" ~ "TRANSFER IN FROM OUT FUNDS", #other
Category == "78new" ~ "ALL OTHER SOURCES" ,
Category == "79" ~ "COOK COUNTY IGT", #dropped
Category == "98" ~ "PRIOR YEAR REFUNDS", #dropped
T ~ "Check Me!"
) )
exp_long <- pivot_longer(ff_exp, exp_402:exp_970 , names_to = c("type", "Category"), values_to = "Dollars", names_sep = "_") %>%
rename(Year = fy ) %>%
mutate(Category_name =
case_when(
Category == "402" ~ "AGING" ,
Category == "406" ~ "AGRICULTURE",
Category == "416" ~ "CENTRAL MANAGEMENT",
Category == "418" ~ "CHILDREN AND FAMILY SERVICES",
Category == "420" ~ "COMMERCE AND ECONOMIC OPPORTUNITY",
Category == "422" ~ "NATURAL RESOURCES" ,
Category == "426" ~ "CORRECTIONS",
Category == "427" ~ "EMPLOYMENT SECURITY" ,
Category == "444" ~ "HUMAN SERVICES" ,
Category == "448" ~ "Innovation and Technology", # AWM added fy2022
Category == "478" ~ "HEALTHCARE & FAM SER NET OF MEDICAID",
Category == "482" ~ "PUBLIC HEALTH",
Category == "492" ~ "REVENUE",
Category == "494" ~ "TRANSPORTATION" ,
Category == "532" ~ "ENVIRONMENTAL PROTECT AGENCY" ,
Category == "557" ~ "IL STATE TOLL HIGHWAY AUTH" ,
Category == "684" ~ "IL COMMUNITY COLLEGE BOARD",
Category == "691" ~ "IL STUDENT ASSISTANCE COMM" ,
Category == "900" ~ "NOT IN FRAME",
Category == "901" ~ "STATE PENSION CONTRIBUTION",
Category == "903" ~ "DEBT SERVICE",
Category == "904" ~ "State Employee Healthcare",
Category == "910" ~ "LEGISLATIVE" ,
Category == "920" ~ "JUDICIAL" ,
Category == "930" ~ "ELECTED OFFICERS" ,
Category == "940" ~ "OTHER HEALTH-RELATED",
Category == "941" ~ "PUBLIC SAFETY" ,
Category == "942" ~ "ECON DEVT & INFRASTRUCTURE" ,
Category == "943" ~ "CENTRAL SERVICES",
Category == "944" ~ "BUS & PROFESSION REGULATION" ,
Category == "945" ~ "MEDICAID" ,
Category == "946" ~ "CAPITAL IMPROVEMENT" ,
Category == "948" ~ "OTHER DEPARTMENTS" ,
Category == "949" ~ "OTHER BOARDS & COMMISSIONS" ,
Category == "959" ~ "K-12 EDUCATION" ,
Category == "960" ~ "UNIVERSITY EDUCATION",
Category == "970" ~ "Local Govt Transfers",
T ~ "CHECK ME!")
)
# write_csv(exp_long, "expenditures_recoded_long_FY22.csv")
# write_csv(rev_long, "revenue_recoded_long_FY22.csv")
aggregated_totals_long <- rbind(rev_long, exp_long)
aggregated_totals_long
year_totals <- aggregated_totals_long %>%
group_by(type, Year) %>%
summarize(Dollars = sum(Dollars, na.rm = TRUE)) %>%
pivot_wider(names_from = "type", values_from = Dollars) %>%
rename(
Expenditures = exp,
Revenue = rev) %>%
mutate(Gap = Revenue - Expenditures)
# creates variable for the Gap each year
year_totals
# write_csv(aggregated_totals_long, "aggregated_totals.csv")
Graphs made from aggregated_totals_long dataframe.
aggregated_totals_long %>%
filter(type == "exp") %>% # uses only expenditures
ggplot(aes(x = Year, y = Dollars, group = Category)) +
geom_line()+
xlab("Year") +
ylab("Millions of Dollars") +
ggtitle("Illinois Expenditures by Category")
aggregated_totals_long %>%
filter(type == "rev") %>% #uses only revenues
ggplot(aes(x = Year, y = Dollars, group = Category, label = Category_name)) +
geom_line()+
xlab("Year") +
ylab("Millions of Dollars") +
ggtitle("Illinois Revenues by Category")
year_totals %>%
ggplot() +
# geom_smooth adds regression line, graphed first so it appears behind line graph
geom_smooth(aes(x = Year, y = Revenue), color = "light green", method = "lm", se = FALSE) +
geom_smooth(aes(x = Year, y = Expenditures), color = "gray", method = "lm", se = FALSE) +
# line graph of revenue and expenditures
geom_line(aes(x = Year, y = Revenue), color = "green4") +
geom_line(aes(x = Year, y = Expenditures), color = "black") +
# labels
theme_bw() +
scale_y_continuous(labels = comma)+
xlab("Year") +
ylab("Millions of Dollars") +
ggtitle("Illinois Expenditures and Revenue Totals, 1998-2022")
Expenditure and revenue amounts in millions of dollars, with and without labels:
exp_long %>%
filter(Year == 2021) %>%
#mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
# select(-c(Year, `Total Expenditures`)) %>%
arrange(desc(`Dollars`)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`))+
coord_flip() +
xlab("Expenditure Categories") +
ylab("Millions of Dollars") +
theme_bw()
exp_long %>%
filter(Year == 2021) %>%
#mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
# select(-c(Year, `Total Expenditures`)) %>%
arrange(desc(`Dollars`)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category, `Dollars`), y = `Dollars`))+
coord_flip() +
xlab("Expenditure Categories") +
ylab("Millions of Dollars") +
theme_bw()
rev_long %>%
filter(Year == 2021) %>%
#mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
# select(-c(Year, `Total Expenditures`)) %>%
arrange(desc(`Dollars`)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`))+
coord_flip() +
xlab("Revenue Categories") +
ylab("Millions of Dollars") +
theme_bw()
rev_long %>%
filter(Year == 2021) %>%
#mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
# select(-c(Year, `Total Expenditures`)) %>%
arrange(desc(`Dollars`)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category, `Dollars`), y = `Dollars`))+
coord_flip() +
xlab("Revenue Categories") +
ylab("Millions of Dollars") +
theme_bw()
Expenditure and revenues when focusing on largest categories and combining others into “All Other Expenditures(Revenues)”:
exp_long %>%
filter( Year == 2021) %>%
mutate(rank = rank(Dollars),
Category_name = ifelse(rank > 13, Category_name, 'All Other Expenditures')) %>%
# select(-c(Year, Dollars, rank)) %>%
arrange(desc(Dollars)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`), fill = "light green")+
coord_flip() +
xlab("") +
theme_bw()
rev_long %>%
filter( Year == 2021) %>%
mutate(rank = rank(Dollars),
Category_name = ifelse(rank > 10, Category_name, 'All Other Expenditures')) %>%
# select(-c(Year, Dollars, rank)) %>%
arrange(desc(Dollars)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`), fill = "light blue")+
coord_flip() +
xlab("") +
theme_bw()
Keeping the top 13 categories and grouping the rest to All Other Expenditures(Revenues). Shown as a percentage of total expenditures(revenues)
exp_long %>%
filter( Year == 2021) %>%
mutate(`Total Expenditures` = sum(Dollars, na.rm = TRUE),
`Percent of Total Expenditures` = round((Dollars / `Total Expenditures`*100), 2),
rank = rank(-Dollars),
Category = ifelse(rank <= 13, Category, 'All Other Expenditures')) %>%
select(-c(Year, `Total Expenditures`, rank)) %>%
arrange(desc(`Percent of Total Expenditures`)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category, `Percent of Total Expenditures`), y = `Percent of Total Expenditures`), fill = "light green")+
coord_flip() +
xlab("") +
ylab("Percent of Total Expenditure") +
theme_bw()
exp_long %>%
filter( Year == 2021) %>%
mutate(`Total Expenditures` = sum(Dollars, na.rm = TRUE),
`Percent of Total Expenditures` = round((Dollars / `Total Expenditures`*100), 2),
rank = rank(-Dollars),
Category_name = ifelse(rank <= 13, Category_name, 'All Other Expendiures')) %>%
select(-c(Year, `Total Expenditures`, rank)) %>%
arrange(desc(`Percent of Total Expenditures`)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category_name, `Percent of Total Expenditures`), y = `Percent of Total Expenditures`), fill = "light green")+
coord_flip() +
xlab("")+
ylab("Percent of Total Expenditure") +
theme_bw()
Each year, you will need to update the CAGR formulas!
calc_cagr is a function created for calculating the
CAGRs for different spans of time.
# function for calculating the CAGR
calc_cagr <- function(df, n) {
df <- exp_long %>%
select(-type) %>%
arrange(Category_name, Year) %>%
group_by(Category_name) %>%
mutate(cagr = ((`Dollars` / lag(`Dollars`, n)) ^ (1 / n)) - 1)
return(df)
}
# This works for one variable at a time
cagr_23 <- calc_cagr(exp_long, 23) %>%
# group_by(Category) %>%
summarize(cagr_23 = round(sum(cagr*100, na.rm = TRUE), 2))
cagr23_precovid <- exp_long %>%
filter(Year <= 2019) %>%
calc_cagr(21) %>%
summarize(cagr_21 = round(sum(cagr*100, na.rm = TRUE), 2))
cagr_10 <- calc_cagr(exp_long, 10) %>%
filter(Year == 2021) %>%
summarize(cagr_10 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_5 <- calc_cagr(exp_long, 5) %>%
filter(Year == 2021) %>%
summarize(cagr_5 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_3 <- calc_cagr(exp_long, 3) %>%
filter(Year == 2021) %>%
summarize(cagr_3 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_2 <- calc_cagr(exp_long, 2) %>%
filter(Year == 2021) %>%
summarize(cagr_2 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_1 <- calc_cagr(exp_long, 1) %>%
filter(Year == 2021) %>%
summarize(cagr_1 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
CAGR_expenditures_summary <- data.frame(cagr_1, cagr_2, cagr_3, cagr_5, cagr_10, cagr_23 ) %>%
select(-c(Category_name.1, Category_name.2, Category_name.3, Category_name.4, Category_name.5 )) %>%
rename("Expenditure Category" = Category_name, "1 Year CAGR" = cagr_1, "2 Year CAGR" = cagr_2, "3 Year CAGR" = cagr_3, "5 Year CAGR" = cagr_5, "10 Year CAGR" = cagr_10,"23 Year CAGR" = cagr_23 )
CAGR_expenditures_summary
# to have it as a csv, uncomment the line below
write_csv(CAGR_expenditures_summary, "CAGR_expenditures_summary.csv")
calc_cagr <- function(df, n) {
df <- rev_long %>%
arrange(Category_name, Year) %>%
group_by(Category_name) %>%
mutate(cagr = ((Dollars / lag(Dollars, n)) ^ (1 / n)) - 1)
return(df)
}
# This works for one variable at a time
cagr_23 <- calc_cagr(rev_long, 23) %>%
# group_by(Category) %>%
summarize(cagr_23 = round(sum(cagr*100, na.rm = TRUE), 2))
cagr_10 <- calc_cagr(rev_long, 10) %>%
filter(Year == 2021) %>%
summarize(cagr_10 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_5 <- calc_cagr(rev_long, 5) %>%
filter(Year == 2021) %>%
summarize(cagr_5 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_3 <- calc_cagr(rev_long, 3) %>%
filter(Year == 2021) %>%
summarize(cagr_3 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_2 <- calc_cagr(rev_long, 2) %>%
filter(Year == 2021) %>%
summarize(cagr_2 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_1 <- calc_cagr(rev_long, 1) %>%
filter(Year == 2021) %>%
summarize(cagr_1 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
CAGR_revenue_summary <- data.frame(cagr_1, cagr_2, cagr_3, cagr_5, cagr_10, cagr_23) %>%
select(-c(Category_name.1, Category_name.2, Category_name.3, Category_name.4, Category_name.5 )) %>%
rename("Revenue Category" = Category_name, "1 Year CAGR" = cagr_1, "2 Year CAGR" = cagr_2, "3 Year CAGR" = cagr_3, "5 Year CAGR" = cagr_5, "10 Year CAGR" = cagr_10,"23 Year CAGR" = cagr_23 )
CAGR_revenue_summary
# to have it as a csv, uncomment the line below
write_csv(CAGR_revenue_summary, "CAGR_revenue_summary.csv")
rm(cagr_1, cagr_2, cagr_3, cagr_5, cagr_10, cagr_23)
Expenditure and Revenue Growth using a lag formula:
exp_long %>%
group_by(Category_name) %>%
mutate(Growth = ((Dollars) - lag(Dollars))/lag(Dollars) *100) %>%
summarize(Growth = round(mean(Growth, na.rm = TRUE), 2))
rev_long %>%
group_by(Category_name) %>%
mutate(Growth = ((Dollars) - lag(Dollars))/lag(Dollars) *100) %>%
summarize(Growth = round(mean(Growth, na.rm = TRUE), 2))
Final column not done yet
revenue_change <- rev_long %>%
select(-c(type,Category)) %>%
filter(Year > 2019) %>%
pivot_wider(names_from = Year , values_from = Dollars, names_prefix = "Dollars_") %>%
mutate("Change from 2020 to 2021" = Dollars_2021 - Dollars_2020,
"Percent Change from 2020 to 2021" = (Dollars_2021 -Dollars_2020)/Dollars_2020) %>%
left_join(CAGR_revenue_summary, by = c("Category_name" = "Revenue Category")) %>%
select(-c(Dollars_2020,`1 Year CAGR`:`10 Year CAGR`))
revenue_change
expenditure_change <- exp_long %>%
select(-c(type,Category)) %>%
filter(Year > 2019) %>%
pivot_wider(names_from = Year , values_from = Dollars, names_prefix = "Dollars_") %>%
mutate("Change from 2020 to 2021" = Dollars_2021 - Dollars_2020,
"Percent Change from 2020 to 2021" = (Dollars_2021 -Dollars_2020)/Dollars_2020) %>%
left_join(CAGR_expenditures_summary, by = c("Category_name" = "Expenditure Category")) %>%
select(-c(Dollars_2020,`1 Year CAGR`:`10 Year CAGR`))
expenditure_change
Saves main items in one excel file named
summary_file.xlsx. Delete eval=FALSE to run on
local computer.
#install.packages("openxlsx")
library(openxlsx)
dataset_names <- list('rev_long' = rev_long, 'exp_long' = exp_long,
`Table 1` = expenditure_change, `Table 2` = revenue_change,
'Table 4.a' = CAGR_revenue_summary, 'Table 4.b' = CAGR_expenditures_summary,
'year_totals' = year_totals)
write.xlsx(dataset_names, file = 'summary_file_AWM_v2.xlsx')